{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>pregnants</th>\n",
       "      <th>Plasma_glucose_concentration</th>\n",
       "      <th>blood_pressure</th>\n",
       "      <th>Triceps_skin_fold_thickness</th>\n",
       "      <th>serum_insulin</th>\n",
       "      <th>BMI</th>\n",
       "      <th>Diabetes_pedigree_function</th>\n",
       "      <th>Age</th>\n",
       "      <th>Target</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>6</td>\n",
       "      <td>148</td>\n",
       "      <td>72</td>\n",
       "      <td>35</td>\n",
       "      <td>0</td>\n",
       "      <td>33.6</td>\n",
       "      <td>0.627</td>\n",
       "      <td>50</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>85</td>\n",
       "      <td>66</td>\n",
       "      <td>29</td>\n",
       "      <td>0</td>\n",
       "      <td>26.6</td>\n",
       "      <td>0.351</td>\n",
       "      <td>31</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>8</td>\n",
       "      <td>183</td>\n",
       "      <td>64</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>23.3</td>\n",
       "      <td>0.672</td>\n",
       "      <td>32</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>89</td>\n",
       "      <td>66</td>\n",
       "      <td>23</td>\n",
       "      <td>94</td>\n",
       "      <td>28.1</td>\n",
       "      <td>0.167</td>\n",
       "      <td>21</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0</td>\n",
       "      <td>137</td>\n",
       "      <td>40</td>\n",
       "      <td>35</td>\n",
       "      <td>168</td>\n",
       "      <td>43.1</td>\n",
       "      <td>2.288</td>\n",
       "      <td>33</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   pregnants  Plasma_glucose_concentration  blood_pressure  \\\n",
       "0          6                           148              72   \n",
       "1          1                            85              66   \n",
       "2          8                           183              64   \n",
       "3          1                            89              66   \n",
       "4          0                           137              40   \n",
       "\n",
       "   Triceps_skin_fold_thickness  serum_insulin   BMI  \\\n",
       "0                           35              0  33.6   \n",
       "1                           29              0  26.6   \n",
       "2                            0              0  23.3   \n",
       "3                           23             94  28.1   \n",
       "4                           35            168  43.1   \n",
       "\n",
       "   Diabetes_pedigree_function  Age  Target  \n",
       "0                       0.627   50       1  \n",
       "1                       0.351   31       0  \n",
       "2                       0.672   32       1  \n",
       "3                       0.167   21       0  \n",
       "4                       2.288   33       1  "
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#input data\n",
    "train = pd.read_csv(\"pima-indians-diabetes.csv\")\n",
    "train.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>pregnants</th>\n",
       "      <th>Plasma_glucose_concentration</th>\n",
       "      <th>blood_pressure</th>\n",
       "      <th>Triceps_skin_fold_thickness</th>\n",
       "      <th>serum_insulin</th>\n",
       "      <th>BMI</th>\n",
       "      <th>Diabetes_pedigree_function</th>\n",
       "      <th>Age</th>\n",
       "      <th>Target</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>768.000000</td>\n",
       "      <td>768.000000</td>\n",
       "      <td>768.000000</td>\n",
       "      <td>768.000000</td>\n",
       "      <td>768.000000</td>\n",
       "      <td>768.000000</td>\n",
       "      <td>768.000000</td>\n",
       "      <td>768.000000</td>\n",
       "      <td>768.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>3.845052</td>\n",
       "      <td>120.894531</td>\n",
       "      <td>69.105469</td>\n",
       "      <td>20.536458</td>\n",
       "      <td>79.799479</td>\n",
       "      <td>31.992578</td>\n",
       "      <td>0.471876</td>\n",
       "      <td>33.240885</td>\n",
       "      <td>0.348958</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>3.369578</td>\n",
       "      <td>31.972618</td>\n",
       "      <td>19.355807</td>\n",
       "      <td>15.952218</td>\n",
       "      <td>115.244002</td>\n",
       "      <td>7.884160</td>\n",
       "      <td>0.331329</td>\n",
       "      <td>11.760232</td>\n",
       "      <td>0.476951</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.078000</td>\n",
       "      <td>21.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>99.000000</td>\n",
       "      <td>62.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>27.300000</td>\n",
       "      <td>0.243750</td>\n",
       "      <td>24.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>3.000000</td>\n",
       "      <td>117.000000</td>\n",
       "      <td>72.000000</td>\n",
       "      <td>23.000000</td>\n",
       "      <td>30.500000</td>\n",
       "      <td>32.000000</td>\n",
       "      <td>0.372500</td>\n",
       "      <td>29.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>6.000000</td>\n",
       "      <td>140.250000</td>\n",
       "      <td>80.000000</td>\n",
       "      <td>32.000000</td>\n",
       "      <td>127.250000</td>\n",
       "      <td>36.600000</td>\n",
       "      <td>0.626250</td>\n",
       "      <td>41.000000</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>17.000000</td>\n",
       "      <td>199.000000</td>\n",
       "      <td>122.000000</td>\n",
       "      <td>99.000000</td>\n",
       "      <td>846.000000</td>\n",
       "      <td>67.100000</td>\n",
       "      <td>2.420000</td>\n",
       "      <td>81.000000</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        pregnants  Plasma_glucose_concentration  blood_pressure  \\\n",
       "count  768.000000                    768.000000      768.000000   \n",
       "mean     3.845052                    120.894531       69.105469   \n",
       "std      3.369578                     31.972618       19.355807   \n",
       "min      0.000000                      0.000000        0.000000   \n",
       "25%      1.000000                     99.000000       62.000000   \n",
       "50%      3.000000                    117.000000       72.000000   \n",
       "75%      6.000000                    140.250000       80.000000   \n",
       "max     17.000000                    199.000000      122.000000   \n",
       "\n",
       "       Triceps_skin_fold_thickness  serum_insulin         BMI  \\\n",
       "count                   768.000000     768.000000  768.000000   \n",
       "mean                     20.536458      79.799479   31.992578   \n",
       "std                      15.952218     115.244002    7.884160   \n",
       "min                       0.000000       0.000000    0.000000   \n",
       "25%                       0.000000       0.000000   27.300000   \n",
       "50%                      23.000000      30.500000   32.000000   \n",
       "75%                      32.000000     127.250000   36.600000   \n",
       "max                      99.000000     846.000000   67.100000   \n",
       "\n",
       "       Diabetes_pedigree_function         Age      Target  \n",
       "count                  768.000000  768.000000  768.000000  \n",
       "mean                     0.471876   33.240885    0.348958  \n",
       "std                      0.331329   11.760232    0.476951  \n",
       "min                      0.078000   21.000000    0.000000  \n",
       "25%                      0.243750   24.000000    0.000000  \n",
       "50%                      0.372500   29.000000    0.000000  \n",
       "75%                      0.626250   41.000000    1.000000  \n",
       "max                      2.420000   81.000000    1.000000  "
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "pregnants                         0\n",
      "Plasma_glucose_concentration      5\n",
      "blood_pressure                   35\n",
      "Triceps_skin_fold_thickness     227\n",
      "serum_insulin                   374\n",
      "BMI                              11\n",
      "Diabetes_pedigree_function        0\n",
      "Age                               0\n",
      "Target                            0\n",
      "dtype: int64\n"
     ]
    }
   ],
   "source": [
    "NaN_col_names = ['Plasma_glucose_concentration','blood_pressure','Triceps_skin_fold_thickness','serum_insulin','BMI']\n",
    "train[NaN_col_names] = train[NaN_col_names].replace(0, np.NaN)\n",
    "print(train.isnull().sum())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 新增一个特征，表示其是否缺失"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   serum_insulin_missing  serum_insulin\n",
      "0                      1            NaN\n",
      "1                      1            NaN\n",
      "2                      1            NaN\n",
      "3                      0           94.0\n",
      "4                      0          168.0\n",
      "5                      1            NaN\n",
      "6                      0           88.0\n",
      "7                      1            NaN\n",
      "8                      0          543.0\n",
      "9                      1            NaN\n",
      "   Triceps_skin_fold_thickness_missing  Triceps_skin_fold_thickness\n",
      "0                                    0                         35.0\n",
      "1                                    0                         29.0\n",
      "2                                    1                          NaN\n",
      "3                                    0                         23.0\n",
      "4                                    0                         35.0\n",
      "5                                    1                          NaN\n",
      "6                                    0                         32.0\n",
      "7                                    1                          NaN\n",
      "8                                    0                         45.0\n",
      "9                                    1                          NaN\n"
     ]
    }
   ],
   "source": [
    "for i in ['serum_insulin','Triceps_skin_fold_thickness' ]:\n",
    "    train[i+'_missing'] = train[i].apply(lambda x:1 if pd.isnull(x) else (0))\n",
    "    print(train[[i+'_missing',i]].head(10))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "#color = sns.color_palette()\n",
    "\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0x1d30c13d5f8>"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEHCAYAAABBW1qbAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8li6FKAAAaIElEQVR4nO3de5QV5Z3u8e8jF8FIglz0IA1CFJ2BGBpt0UxijhfOAckYMRcXzGS8B7MWScxk5GhmZSkSXSvJJNE4JjgYFYwO6jFR0fFuNMTMKDaICBiWGD2yBQVRiQyi0v7OH/V2uW12Nxvo6t3Qz2etvXbVW29V/fZu6Kffqtq1FRGYmZkB7FXrAszMrPNwKJiZWc6hYGZmOYeCmZnlHApmZpbrXusCdsWAAQNi2LBhtS7DzGy3smjRotcjYmClZbt1KAwbNozGxsZal2FmtluR9P9aW+bDR2ZmlnMomJlZzqFgZma53fqcgplZrbz//vuUSiW2bNlS61Ja1atXL+rq6ujRo0fV6zgUzMx2QqlUok+fPgwbNgxJtS5nGxHBhg0bKJVKDB8+vOr1fPjIzGwnbNmyhf79+3fKQACQRP/+/Xd4JONQMDPbSZ01EJrtTH0OBTMzy/mcgplZO9iwYQMnnngiAK+++irdunVj4MDsQ8MLFy6kZ8+e7b7PxYsXs27dOiZMmNBu2+zyoXDk9BtrXUKnsehfTq91CWa7rf79+7NkyRIAZsyYwb777ssFF1xQ9fpNTU1069Zth/a5ePFili1b1q6h4MNHZmYFO/nkkznyyCMZNWoUv/rVrwDYunUrffv25fvf/z5jx45l4cKFzJ8/n8MOO4xjjz2Wb33rW0yaNAmATZs2ceaZZzJ27FjGjBnD3XffzTvvvMPMmTO5+eabqa+v5/bbb2+XWrv8SMHMrGhz586lX79+bN68mYaGBr785S/Tp08fNm7cyBFHHMFll13G5s2bOfTQQ/njH//I0KFDOe200/L1Z86cyYQJE5gzZw5vvvkmRx99NEuXLuXiiy9m2bJlXHnlle1Wa2EjBUm9JC2U9Iyk5ZIuTe1zJL0oaUl61Kd2SbpK0ipJSyUdUVRtZmYd6YorrmD06NF85jOfoVQq8cILLwDQs2dPTj31VABWrFjBYYcdxkEHHYQkpkyZkq//4IMPcvnll1NfX8/xxx/Pli1bePnllwuptciRwrvACRGxSVIP4HFJ96Vl0yOi5VjnJGBEehwNzErPZma7rYcffpgFCxbwxBNP0Lt3bz73uc/lnx3o3bt3ftloRLS6jYjgzjvv5OCDD/5I+4IFC9q93sJGCpHZlGZ7pEfrrxpOAW5M6z0B9JU0qKj6zMw6wsaNG+nXrx+9e/dm+fLlPPXUUxX7jRo1ipUrV7J69WoigltvvTVfNn78eK666qp8/umnnwagT58+vP322+1ab6EnmiV1k7QEWAc8FBFPpkWXp0NEV0jaO7UNBlaXrV5KbS23OVVSo6TG9evXF1m+mdku+8IXvsDmzZsZPXo0M2fO5OijKx8A2Weffbj66qsZN24cxx57LAceeCCf+MQnALjkkkvYvHkzhx9+OKNGjWLGjBkAnHDCCTzzzDOMGTNm9zjRHBFNQL2kvsAdkj4FfA94FegJzAYuBGYClT56t83IIiJmp/VoaGhoa+RhZlYTzb+0Ibsp3QMPPFCx31tvvfWR+XHjxrFy5UoigvPOO4+GhgYAPvaxj3Httddus/7AgQPb/YvGOuSS1Ih4C3gMmBARa9MhoneBG4CxqVsJGFK2Wh2wpiPqMzPrDGbNmkV9fT0jR47knXfe4etf/3qH11DYSEHSQOD9iHhLUm9gHPAjSYMiYq2ysyuTgGVplfnANyXdQnaCeWNErC2qPjOzzmb69OlMnz69pjUUefhoEDBXUjeyEcltEXGPpN+lwBCwBPhG6n8vMBFYBWwGziqwNjMzq6CwUIiIpcCYCu0ntNI/gGlF1WNmZtvn21yYmVnOoWBmZjnf+8jMrB209x2Xq7lr8f3338/5559PU1MT5557LhdddNEu79cjBTOz3VBTUxPTpk3jvvvuY8WKFcybN48VK1bs8nYdCmZmu6GFCxdyyCGH8MlPfpKePXsyefJk7rrrrl3erkPBzGw39MorrzBkyIef962rq+OVV17Z5e06FMzMdkOV7qrafMfVXeFQMDPbDdXV1bF69Yf3EC2VShx44IG7vF2HgpnZbuioo47i+eef58UXX+S9997jlltu4Ytf/OIub9eXpJqZtYNqLiFtT927d+fqq69m/PjxNDU1cfbZZzNq1Khd32471GZmZjUwceJEJk6c2K7b9OEjMzPLORTMzCznUDAzs5xDwczMcg4FMzPLORTMzCznS1LNzNrByzMPb9ftDb342e32Ofvss7nnnnvYf//9WbZs2Xb7V8MjBTOz3dSZZ57J/fff367bdCiYme2mPv/5z9OvX7923WZhoSCpl6SFkp6RtFzSpal9uKQnJT0v6VZJPVP73ml+VVo+rKjazMyssiJHCu8CJ0TEaKAemCDpGOBHwBURMQJ4Ezgn9T8HeDMiDgGuSP3MzKwDFRYKkdmUZnukRwAnALen9rnApDR9SponLT9R7XFzcDMzq1qh5xQkdZO0BFgHPAS8ALwVEVtTlxIwOE0PBlYDpOUbgf4VtjlVUqOkxvXr1xdZvplZl1PoJakR0QTUS+oL3AH8daVu6bnSqGCbrxaKiNnAbICGhoZtv3rIzKwGqrmEtL1NmTKFxx57jNdff526ujouvfRSzjnnnO2v2IYO+ZxCRLwl6THgGKCvpO5pNFAHrEndSsAQoCSpO/AJ4I2OqM/MbHc0b968dt9mkVcfDUwjBCT1BsYBzwGPAl9J3c4A7krT89M8afnvotKXkJqZWWGKHCkMAuZK6kYWPrdFxD2SVgC3SLoMeBq4LvW/Dvi1pFVkI4TJBdZmZmYVFBYKEbEUGFOh/c/A2ArtW4CvFlWPmVl7iwg680WSO3OwxZ9oNjPbCb169WLDhg079Yu3I0QEGzZsoFevXju0nm+IZ2a2E+rq6iiVSnTmS+N79epFXV3dDq3jUDAz2wk9evRg+PDhtS6j3fnwkZmZ5RwKZmaWcyiYmVnOoWBmZjmHgpmZ5RwKZmaWcyiYmVnOoWBmZjmHgpmZ5RwKZmaWcyiYmVnOoWBmZjmHgpmZ5RwKZmaWcyiYmVnOoWBmZrnCQkHSEEmPSnpO0nJJ56f2GZJekbQkPSaWrfM9SaskrZQ0vqjazMyssiK/eW0r8E8RsVhSH2CRpIfSsisi4iflnSWNBCYDo4ADgYclHRoRTQXWaGZmZQobKUTE2ohYnKbfBp4DBrexyinALRHxbkS8CKwCxhZVn5mZbatDzilIGgaMAZ5MTd+UtFTS9ZL2S22DgdVlq5WoECKSpkpqlNTYmb8w28xsd1R4KEjaF/gN8J2I+AswCzgYqAfWAj9t7lph9dimIWJ2RDRERMPAgQMLqtrMrGsqNBQk9SALhJsj4rcAEfFaRDRFxAfAtXx4iKgEDClbvQ5YU2R9Zmb2UUVefSTgOuC5iPhZWfugsm6nAsvS9HxgsqS9JQ0HRgALi6rPzMy2VeTVR58F/gF4VtKS1PbPwBRJ9WSHhl4CzgOIiOWSbgNWkF25NM1XHpmZdazCQiEiHqfyeYJ721jncuDyomoyM7O2+RPNZmaWcyiYmVnOoWBmZjmHgpmZ5RwKZmaWcyiYmVnOoWBmZjmHgpmZ5RwKZmaWcyiYmVnOoWBmZjmHgpmZ5RwKZmaWcyiYmVnOoWBmZjmHgpmZ5RwKZmaWcyiYmVmuqlCQ9Eg1bWZmtntr8zuaJfUC9gEGSNqPD79z+ePAgQXXZmZmHWx7I4XzgEXAX6Xn5sddwC/aWlHSEEmPSnpO0nJJ56f2fpIekvR8et4vtUvSVZJWSVoq6YhdfXFmZrZj2gyFiPh5RAwHLoiIT0bE8PQYHRFXb2fbW4F/ioi/Bo4BpkkaCVwEPBIRI4BH0jzAScCI9JgKzNr5l2VmZjujzcNHzSLiXyX9DTCsfJ2IuLGNddYCa9P025KeAwYDpwDHpW5zgceAC1P7jRERwBOS+koalLZjZmYdoKpQkPRr4GBgCdCUmgNoNRRarD8MGAM8CRzQ/Is+ItZK2j91GwysLlutlNo+EgqSppKNJBg6dGg1uzczsypVFQpAAzAy/RW/QyTtC/wG+E5E/EVSq10rtG2zv4iYDcwGaGho2OF6zMysddV+TmEZ8D92dOOSepAFws0R8dvU/JqkQWn5IGBdai8BQ8pWrwPW7Og+zcxs51UbCgOAFZIekDS/+dHWCsqGBNcBz0XEz8oWzQfOSNNnkF3J1Nx+eroK6Rhgo88nmJl1rGoPH83YiW1/FvgH4FlJS1LbPwM/BG6TdA7wMvDVtOxeYCKwCtgMnLUT+zQzs11Q7dVHv9/RDUfE41Q+TwBwYoX+AUzb0f2YmVn7qfbqo7f58KRvT6AH8N8R8fGiCjMzs45X7UihT/m8pEnA2EIqMjOzmtmpu6RGxJ3ACe1ci5mZ1Vi1h4++VDa7F9nnFvwZATOzPUy1Vx+dXDa9FXiJ7LYUZma2B6n2nIIvDzUz6wKqPXxUB/wr2WcPAngcOD8iSgXWZh3s5ZmH17qETmPoxc/WugSzmqj2RPMNZJ84PpDsJnV3pzYzM9uDVBsKAyPihojYmh5zgIEF1mVmZjVQbSi8Lulrkrqlx9eADUUWZmZmHa/aUDgbOA14lez7Db6C701kZrbHqfaS1B8AZ0TEm5B9zzLwE7KwMDOzPUS1I4VPNwcCQES8QfZNamZmtgepNhT2krRf80waKVQ7yjAzs91Etb/Yfwr8p6TbyT6ncBpweWFVmZlZTVT7ieYbJTWS3QRPwJciYkWhlZmZWYer+hBQCgEHgZnZHmynbp1tZmZ7JoeCmZnlHApmZpYrLBQkXS9pnaRlZW0zJL0iaUl6TCxb9j1JqyStlDS+qLrMzKx1RY4U5gATKrRfERH16XEvgKSRwGRgVFrnl5K6FVibmZlVUFgoRMQC4I0qu58C3BIR70bEi8AqYGxRtZmZWWW1OKfwTUlL0+Gl5k9JDwZWl/UppbZtSJoqqVFS4/r164uu1cysS+noUJgFHAzUk91t9aepXRX6RqUNRMTsiGiIiIaBA/2VDmZm7alDQyEiXouIpoj4ALiWDw8RlYAhZV3rgDUdWZuZmXVwKEgaVDZ7KtB8ZdJ8YLKkvSUNB0YACzuyNjMzK/BOp5LmAccBAySVgEuA4yTVkx0aegk4DyAilku6jew2GluBaRHRVFRtZmZWWWGhEBFTKjRf10b/y/GdV83MasrfiWDWSR05/cZal9BpLPqX02tdQpfh21yYmVnOoWBmZjmHgpmZ5RwKZmaWcyiYmVnOoWBmZjmHgpmZ5RwKZmaWcyiYmVnOoWBmZjmHgpmZ5RwKZmaWcyiYmVnOoWBmZjmHgpmZ5RwKZmaWcyiYmVnOoWBmZrnCQkHS9ZLWSVpW1tZP0kOSnk/P+6V2SbpK0ipJSyUdUVRdZmbWuiJHCnOACS3aLgIeiYgRwCNpHuAkYER6TAVmFViXmZm1orBQiIgFwBstmk8B5qbpucCksvYbI/ME0FfSoKJqMzOzyjr6nMIBEbEWID3vn9oHA6vL+pVSm5mZdaDOcqJZFdqiYkdpqqRGSY3r168vuCwzs66lo0PhtebDQul5XWovAUPK+tUBayptICJmR0RDRDQMHDiw0GLNzLqajg6F+cAZafoM4K6y9tPTVUjHABubDzOZmVnH6V7UhiXNA44DBkgqAZcAPwRuk3QO8DLw1dT9XmAisArYDJxVVF1mZta6wkIhIqa0sujECn0DmFZULWZmVp3OcqLZzMw6gcJGCmZm7eXlmYfXuoROY+jFzxa6fY8UzMws51AwM7OcQ8HMzHIOBTMzyzkUzMws51AwM7OcQ8HMzHIOBTMzyzkUzMws51AwM7OcQ8HMzHIOBTMzyzkUzMws51AwM7OcQ8HMzHIOBTMzyzkUzMws51AwM7NcTb6OU9JLwNtAE7A1Ihok9QNuBYYBLwGnRcSbtajPzKyrquVI4fiIqI+IhjR/EfBIRIwAHknzZmbWgTrT4aNTgLlpei4wqYa1mJl1SbUKhQAelLRI0tTUdkBErAVIz/tXWlHSVEmNkhrXr1/fQeWamXUNNTmnAHw2ItZI2h94SNKfql0xImYDswEaGhqiqALNzLqimowUImJNel4H3AGMBV6TNAggPa+rRW1mZl1Zh4eCpI9J6tM8DfxvYBkwHzgjdTsDuKujazMz6+pqcfjoAOAOSc37//eIuF/SU8Btks4BXga+WoPazMy6tA4PhYj4MzC6QvsG4MSOrsfMzD7UmS5JNTOzGnMomJlZzqFgZmY5h4KZmeUcCmZmlnMomJlZzqFgZmY5h4KZmeUcCmZmlnMomJlZzqFgZmY5h4KZmeUcCmZmlnMomJlZzqFgZmY5h4KZmeUcCmZmlnMomJlZzqFgZmY5h4KZmeU6XShImiBppaRVki6qdT1mZl1JpwoFSd2AXwAnASOBKZJG1rYqM7Ouo1OFAjAWWBURf46I94BbgFNqXJOZWZfRvdYFtDAYWF02XwKOLu8gaSowNc1ukrSyg2rb4x0EA4DXa11Hp3CJal2BlfG/zTLt82/zoNYWdLZQqPRq4yMzEbOB2R1TTtciqTEiGmpdh1lL/rfZcTrb4aMSMKRsvg5YU6NazMy6nM4WCk8BIyQNl9QTmAzMr3FNZmZdRqc6fBQRWyV9E3gA6AZcHxHLa1xWV+LDctZZ+d9mB1FEbL+XmZl1CZ3t8JGZmdWQQ8HMzHIOBfOtRazTknS9pHWSltW6lq7CodDF+dYi1snNASbUuoiuxKFgvrWIdVoRsQB4o9Z1dCUOBat0a5HBNarFzGrMoWDbvbWImXUdDgXzrUXMLOdQMN9axMxyDoUuLiK2As23FnkOuM23FrHOQtI84L+AwySVJJ1T65r2dL7NhZmZ5TxSMDOznEPBzMxyDgUzM8s5FMzMLOdQMDOznEPBzMxyDoUuRlJ/SUvS41VJr5TN92zR9wFJfWpVa0uSHpdUX6F9p+qUNFLSM5KeljSslT7dJb3VyrKbJE1qY/vfldSriu1Mk/T3bWxnnKQ723otnYmkGyQdtoPrnCppelE1WfU61Xc0W/EiYgNQDyBpBrApIn5S3keSyD7DMr7jK9xxu1Dnl4DbI+IH7VlPme8C1wNb2uoUEb8oaP81ERFn7cQ6dxRRi+04jxQMAEmHSFom6RpgMTAofYK0b1p+lqSl6S/rG1LbAZJ+K6lR0kJJx6T2yyTNlfSopOclnZ3aB6e/9pekff1NK7V0l/RrSc+mft9usbxb+it9RpovSepb9hquk7Rc0n3Nf6lX2McXyT7J/Q1JD6e2/5PWXybpWxXW2UvSLyWtkHQ3MKCN9/Mfgf2BPzRvP7X/ML2H/yVp/7L36ztp+lBJv0t9FrccwUg6urk9rXedpN9L+rOkaWX9zkg/kyWp5r1ae18l/WN6Tc9IuqmN13SZpDmSHpT0kqRJkn6atvUfkrqnfo9Lqt+R/Uk6V9KVafomST+X9J/pdZ1a9nO/Jv1s75Z0v9oYqdlOigg/uugDmAFckKYPAT4AjipbXgL6AqOBPwH9Unvz863AMWl6GLAsTV9GFiy9yH4xloADgAuBC1OfbsC+rdR1NHBf2Xzf9Pw40JD2e2GFOg8B3gcOT+2/BSa38fovA76TpscCzwD7AH3IbvnxabLR9Fupz2nAfWR/TNUBfwEmtbH9Ulnt3cnuPntSmv8ZcFGFOhYBJ6fpXqmeccCdwLFAI1BXtt4fgJ7pfd6Q3tdPpf7dU7/ZwN+18b6uBXqWt7Xxfv0+vZYjgc3A/0rL7gb+tuznVL8j+wPOBa5M0zcB88ju4Ptp4E+pfXLaz17AgcDGtt5/P3bu4ZGClXshIp6q0H4CcGtEvAHQ/Ez2y+oaSUvIfgntJ6l3WnZnRGyJiHXAAuAospvvnSvpEuBTEbGplTpWkd3r5ueSxpP95292HbA4In7U2roR8WyaXkQWVtU4FvhNRGyOiLfT6/lciz6fB+ZFxAcRUQIeq3Lbzd6JiPtaq03SfsCAiLgbIL1/m9PiTwG/JPvFWypb7Z6IeC+9z28AA8l+LkcBjeln8z+Bg2n9fV0O3KTsvMb723kN90Z2v6xnU40PpfZnW76eXdzfnZFZyoff7/E5sntzfRARa8gCytqZQ8HK/Xcr7aLydywIGBsR9ekxOCLeScta9o+I+B1wHNlfijerlZOrkZ33+DTZX5zfBv6tbPEfgRMl7d1Kre+WTTdR/XmzSt8rUbG8KvtV8l7ZdGu1tbb9NWn9lifaK71eAdeX/VwOi4gftPG+jgeuIRstNSr7itbWNO/vgxav54OWr2cX91f+utTi2QrkULBqPAxMltQPoPk5tZcfxy7/hTVJ0t6SBpAOe0g6CHg1ImaTfffumEo7kzSQ7ET3/wUuAY4oWzw77feW5mPY7WQBcKqk3pL2JftK0j9U6DM5HZ8fTPYXeFveJjsUVZWIeBN4XdLJAJJ6SdonLX4D+Fvgx5KO3c6mHgZOS+998xVnQyu9r+kXcl0K7OlkI419Wtvwjihgf48DX1FmENnIzdqZrz6y7YqIpZJ+DCyQtJXs0Mc5ZIEwS9JZZP+WHuXDkHiK7Pj7EOCSiHhN2Qnn70p6H9gEfK2VXQ4BrpPUPEK5sEU9P5Z0OTBH0unt9BoXKrtNc/Phs1kR8WyL4LkdOB5YBqwkC4m2zAYelrSa6r98/u+Bf0uv7z3gy2U1rlV2gvzetl53qvvStO+9yA7RfINsJNHyfe0O/LuyS3r3An6UDp+1h0o/x4r7y7ps121khzKb3/8n+eihRWsHvnW2tTtJlwGvR8SVta7F9iyS9o2ITWkU8iRwdESsr3VdexKPFMxsd3KfpI8DPchGoA6EduaRgtWUpEa2/ePk7yJiRTvu4xrgmBbNP4uIG9tp+/OBoS2aL4iIhyv17+wknUv2GY5yCyLi25X6257FoWBmZjlffWRmZjmHgpmZ5RwKZmaWcyiYmVnu/wOzaLn5UsvYDwAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "sns.countplot(x=\"Triceps_skin_fold_thickness_missing\", hue=\"Target\",data=train)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0x1d30a9fec88>"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEHCAYAAABBW1qbAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8li6FKAAAVJ0lEQVR4nO3df5BV5Z3n8fc3/FhQWRVsXbFR0KC79BAabTHZxEyi7kjIRjSJjtQm0WiCM2tSZmq01smmlDBhN1uTxIzjjFVqHHDXn2t+iK7lj1AxVLJjEAjhhw6RRFdaUZAYooOotN/9454+XuE23Ia+fbvp96vq1r3nuc9z7vd2dfWnz3POfW5kJpIkAbyn2QVIkgYOQ0GSVDIUJEklQ0GSVDIUJEml4c0uYH8cccQROXHixGaXIUmDyooVK17OzJZazw3qUJg4cSLLly9vdhmSNKhExP/r6TmnjyRJJUNBklQyFCRJpUF9TkGSmuWtt96is7OTHTt2NLuUHo0aNYrW1lZGjBhR9xhDQZL2QWdnJ2PGjGHixIlERLPL2U1msnXrVjo7O5k0aVLd45w+kqR9sGPHDsaNGzcgAwEgIhg3blyvj2QMBUnaRwM1ELrtS32GgiSp5DkFSeoDW7du5cwzzwTgxRdfZNiwYbS0VD40vGzZMkaOHNnnr7ly5Uo2b97MzJkz+2yfQz4UTrnqtmaXMGCs+JvPNbsEadAaN24cq1atAmDevHkccsghXHnllXWP7+rqYtiwYb16zZUrV7J27do+DQWnjySpwT7xiU9wyimn0NbWxi233ALAzp07Oeyww/ja177GjBkzWLZsGYsXL+akk07i9NNP58tf/jLnnnsuAK+99hoXX3wxM2bMYPr06dx///28/vrrzJ8/n9tvv5329nbuvffePql1yB8pSFKjLVq0iLFjx7J9+3Y6Ojr41Kc+xZgxY9i2bRsnn3wy3/jGN9i+fTsnnngiP//5zzn22GO54IILyvHz589n5syZLFy4kFdeeYXTTjuN1atXc80117B27Vq++93v9lmtHilIUoNdd911TJs2jQ984AN0dnbym9/8BoCRI0dy3nnnAfDkk09y0kkncdxxxxERzJkzpxz/yCOPsGDBAtrb2/noRz/Kjh07eO655xpSq0cKktRAP/7xj1m6dCmPP/44o0eP5kMf+lD52YHRo0eXl41mZo/7yEx+9KMfccIJJ7yrfenSpX1er0cKktRA27ZtY+zYsYwePZp169bxxBNP1OzX1tbG+vXr2bhxI5nJ3XffXT539tlnc/3115fbv/zlLwEYM2YMr776ap/WayhIUgN9/OMfZ/v27UybNo358+dz2mmn1ex30EEHccMNN3DWWWdx+umnM378eA499FAArr32WrZv387UqVNpa2tj3rx5AJxxxhn86le/Yvr06Z5olqSBqvuPNlQWpXv44Ydr9vv973//ru2zzjqL9evXk5lcdtlldHR0AHDwwQdz88037za+paWlz79orGFHChExISJ+EhFPRcS6iLiiaJ8XEc9HxKriNqtqzF9FxIaIWB8RZzeqNkkaiG688Uba29uZMmUKr7/+Ol/84hf7vYZGHinsBP4yM1dGxBhgRUQ8Wjx3XWZ+q7pzREwBLgTagPHAjyPixMzsamCNkjRgXHXVVVx11VVNraFhRwqZuSkzVxaPXwWeAo7Zw5DZwF2Z+UZmPgNsAGY0qj5J0u765URzREwEpgO/KJq+FBGrI+LWiDi8aDsG2Fg1rJMaIRIRcyNieUQs37JlSwOrlqShp+GhEBGHAN8HvpKZfwBuBE4A2oFNwLe7u9YYvtuFu5l5U2Z2ZGZH92JTkqS+0dBQiIgRVALh9sz8AUBmvpSZXZn5NnAz70wRdQITqoa3Ai80sj5J0rs17ERzVD6m9z3gqcz8TlX70Zm5qdg8D1hbPF4M3BER36FyonkysKxR9UlSX+rrFZfrWbX4oYce4oorrqCrq4svfOELXH311fv9uo28+uiDwGeBNRGxqmj7KjAnItqpTA09C1wGkJnrIuIe4EkqVy5d7pVHklRbV1cXl19+OY8++iitra2ceuqpnHPOOUyZMmW/9tuwUMjMn1H7PMGDexizAFjQqJok6UCxbNky3vve93L88ccDcOGFF3Lfffftdyi4zIUkDULPP/88Eya8cxq2tbWV559/fr/3ayhI0iBUa1XV7hVX94ehIEmDUGtrKxs3vvPRrs7OTsaPH7/f+zUUJGkQOvXUU3n66ad55plnePPNN7nrrrs455xz9nu/rpIqSX2gnktI+9Lw4cO54YYbOPvss+nq6uKSSy6hra1t//fbB7VJaoC+vu59MOvvP7iDxaxZs5g1a9beO/aC00eSpJKhIEkqGQqSpJKhIEkqGQqSpJKhIEkqeUmqJPWB5+ZP7dP9HXvNmr32ueSSS3jggQc48sgjWbt27V7718MjBUkapC6++GIeeuihPt2noSBJg9SHP/xhxo4d26f7NBQkSSVDQZJUMhQkSSVDQZJU8pJUSeoD9VxC2tfmzJnDY489xssvv0xraytf//rXufTSS/drn4aCJA1Sd955Z5/v0+kjSVLJUJAklQwFSdpHmdnsEvZoX+ozFCRpH4waNYqtW7cO2GDITLZu3cqoUaN6Nc4TzZK0D1pbW+ns7GTLli3NLqVHo0aNorW1tVdjDAVJ2gcjRoxg0qRJzS6jzzl9JEkqGQqSpJKhIEkqNSwUImJCRPwkIp6KiHURcUXRPjYiHo2Ip4v7w4v2iIjrI2JDRKyOiJMbVZskqbZGHinsBP4yM/8d8H7g8oiYAlwNLMnMycCSYhvgY8Dk4jYXuLGBtUmSamhYKGTmpsxcWTx+FXgKOAaYDSwqui0Czi0ezwZuy4rHgcMi4uhG1SdJ2l2/nFOIiInAdOAXwFGZuQkqwQEcWXQ7BthYNayzaNt1X3MjYnlELB/I1wdL0mDU8FCIiEOA7wNfycw/7KlrjbbdPiqYmTdlZkdmdrS0tPRVmZIkGhwKETGCSiDcnpk/KJpf6p4WKu43F+2dwISq4a3AC42sT5L0bo28+iiA7wFPZeZ3qp5aDFxUPL4IuK+q/XPFVUjvB7Z1TzNJkvpHI5e5+CDwWWBNRKwq2r4KfBO4JyIuBZ4Dzi+eexCYBWwAtgOfb2BtkqQaGhYKmfkzap8nADizRv8ELm9UPZKkvfMTzZKkkqEgSSoZCpKkkqEgSSoZCpKkkqEgSSoZCpKkkqEgSSoZCpKkkqEgSSoZCpKkkqEgSSoZCpKkkqEgSSoZCpKkkqEgSSoZCpKkkqEgSSoZCpKkkqEgSSoZCpKkkqEgSSoZCpKkkqEgSSoZCpKkkqEgSSoZCpKkkqEgSSoZCpKkkqEgSSo1LBQi4taI2BwRa6va5kXE8xGxqrjNqnruryJiQ0Ssj4izG1WXJKlndYVCRCypp20XC4GZNdqvy8z24vZgsa8pwIVAWzHmHyJiWD21SZL6zh5DISJGRcRY4IiIODwixha3icD4PY3NzKXA7+qsYzZwV2a+kZnPABuAGXWOlST1kb0dKVwGrAD+bXHffbsP+Pt9fM0vRcTqYnrp8KLtGGBjVZ/Oom03ETE3IpZHxPItW7bsYwmSpFr2GAqZ+beZOQm4MjOPz8xJxW1aZt6wD693I3AC0A5sAr5dtEetl++hppsysyMzO1paWvahBElST4bX0ykz/y4i/j0wsXpMZt7WmxfLzJe6H0fEzcADxWYnMKGqayvwQm/2LUnaf3WFQkT8Tyr/4a8CuormBHoVChFxdGZuKjbPA7qvTFoM3BER36FyrmIysKw3+5Yk7b+6QgHoAKZkZs0pnVoi4k7gI1ROUncC1wIfiYh2KoHyLJVzFmTmuoi4B3gS2AlcnpldtfYrSWqcekNhLfBvqJwHqEtmzqnR/L099F8ALKh3/5KkvldvKBwBPBkRy4A3uhsz85yGVCVJaop6Q2FeI4uQJA0M9V599NNGFyJJar56rz56lXc+NzASGAH8S2b+60YVJknqf/UeKYyp3o6Ic3EZCkn95Ln5U5tdwoBx7DVrGrr/fVolNTN/BJzRx7VIkpqs3umjT1ZtvofK5xbq/syCJGlwqPfqo09UPd5J5YNns/u8GklSU9V7TuHzjS5EktR89X7JTmtE/LD4JrWXIuL7EdHa6OIkSf2r3umjfwTuAM4vtj9TtP2HRhSl5vAKj3c0+goPaaCq9+qjlsz8x8zcWdwWAn6ZgSQdYOoNhZcj4jMRMay4fQbY2sjCJEn9r95QuAS4AHiRykqpnwY8+SxJB5h6zyn8NXBRZr4CEBFjgW9RCQtJ0gGi3iOF93UHAkBm/g6Y3piSJEnNUm8ovCciDu/eKI4U6j3KkCQNEvX+Yf828H8j4l4qy1tcgN+SJkkHnHo/0XxbRCynsgheAJ/MzCcbWpkkqd/VPQVUhIBBIEkHsH1aOluSdGAyFCRJJUNBklQyFCRJJUNBklQyFCRJJUNBklQyFCRJJUNBklQyFCRJpYaFQkTcGhGbI2JtVdvYiHg0Ip4u7g8v2iMiro+IDRGxOiJOblRdkqSeNfJIYSEwc5e2q4ElmTkZWFJsA3wMmFzc5gI3NrAuSVIPGhYKmbkU+N0uzbOBRcXjRcC5Ve23ZcXjwGERcXSjapMk1dbf5xSOysxNAMX9kUX7McDGqn6dRdtuImJuRCyPiOVbtmxpaLGSNNQMlBPNUaMta3XMzJsysyMzO1paWhpcliQNLf0dCi91TwsV95uL9k5gQlW/VuCFfq5Nkoa8/g6FxcBFxeOLgPuq2j9XXIX0fmBb9zSTJKn/1P3Na70VEXcCHwGOiIhO4Frgm8A9EXEp8BxwftH9QWAWsAHYDny+UXVJknrWsFDIzDk9PHVmjb4JXN6oWiRJ9RkoJ5olSQOAoSBJKhkKkqSSoSBJKhkKkqSSoSBJKhkKkqSSoSBJKhkKkqSSoSBJKhkKkqSSoSBJKhkKkqSSoSBJKhkKkqSSoSBJKhkKkqSSoSBJKhkKkqSSoSBJKhkKkqSSoSBJKhkKkqSSoSBJKhkKkqSSoSBJKhkKkqSSoSBJKhkKkqSSoSBJKg1vxotGxLPAq0AXsDMzOyJiLHA3MBF4FrggM19pRn2SNFQ180jho5nZnpkdxfbVwJLMnAwsKbYlSf1oIE0fzQYWFY8XAec2sRZJGpKaFQoJPBIRKyJibtF2VGZuAijuj2xSbZI0ZDXlnALwwcx8ISKOBB6NiH+ud2ARInMBjj322EbVJ0lDUlOOFDLzheJ+M/BDYAbwUkQcDVDcb+5h7E2Z2ZGZHS0tLf1VsiQNCf0eChFxcESM6X4M/AmwFlgMXFR0uwi4r79rk6ShrhnTR0cBP4yI7te/IzMfiogngHsi4lLgOeD8JtQmSUNav4dCZv4WmFajfStwZn/XI0l6x0C6JFWS1GSGgiSpZChIkkqGgiSpZChIkkqGgiSpZChIkkqGgiSpZChIkkqGgiSpZChIkkqGgiSpZChIkkqGgiSpZChIkkqGgiSpZChIkkqGgiSpZChIkkqGgiSpZChIkkqGgiSpZChIkkqGgiSpZChIkkqGgiSpZChIkkqGgiSpZChIkkqGgiSpZChIkkoDLhQiYmZErI+IDRFxdbPrkaShZECFQkQMA/4e+BgwBZgTEVOaW5UkDR0DKhSAGcCGzPxtZr4J3AXMbnJNkjRkDG92Abs4BthYtd0JnFbdISLmAnOLzdciYn0/1XbAOw6OAF5udh0DwrXR7ApUxd/NKn3zu3lcT08MtFCo9W7zXRuZNwE39U85Q0tELM/MjmbXIe3K383+M9CmjzqBCVXbrcALTapFkoacgRYKTwCTI2JSRIwELgQWN7kmSRoyBtT0UWbujIgvAQ8Dw4BbM3Ndk8saSpyW00Dl72Y/iczcey9J0pAw0KaPJElNZChIkkqGglxaRANWRNwaEZsjYm2zaxkqDIUhzqVFNMAtBGY2u4ihxFCQS4towMrMpcDvml3HUGIoqNbSIsc0qRZJTWYoaK9Li0gaOgwFubSIpJKhIJcWkVQyFIa4zNwJdC8t8hRwj0uLaKCIiDuBfwJOiojOiLi02TUd6FzmQpJU8khBklQyFCRJJUNBklQyFCRJJUNBklQyFCRJJUNBKkTEn0XE5/p4nwsj4tPF41sauQJtRDwYEYf1ckyfv2cNbn5OQYNORAwvPnQ34EXEQuCBzLy32bVI9fBIQU0TEQdHxP+JiF9FxNqI+NOIOCUifhoRKyLi4Yg4uuj7WET8t4j4KXBF9X/gxfOvFfcfKcbfExG/johvRsR/iohlEbEmIk7YQz3zIuLKqtf7H8W4X0fE6UV7W9G2KiJWR8TkiJhY/SUwEXFlRMyrsf/HIqKju96IWFC898cj4qg91LUwIm6MiJ9ExG8j4o+LL595qgid7n7PRsQRtX6uxfPfjIgni7q/1Yv3fFDx81wdEXdHxC+634cOPIaCmmkm8EJmTsvMPwIeAv4O+HRmngLcCiyo6n9YZv5xZn57L/udBlwBTAU+C5yYmTOAW4Av96K+4cW4rwDXFm1/BvxtZrYDHVQWFNwXBwOPZ+Y0YCnwxb30Pxw4A/gL4H7gOqANmBoR7bv03e3nGhFjgfOAtsx8H/CNHl6n1nv+z8Arxbi/Bk7pxfvUIGMoqJnWAGcV/52eTmW11j8CHo2IVcDXqKza2u3uOvf7RGZuysw3gN8Aj1S93sRe1PeD4n5F1bh/Ar4aEf8FOC4zX+/F/qq9CTxQY/89uT8rc71rgJcyc01mvg2sqzH2XT/XzNwG/AHYAdwSEZ8EtvfwOrXe84eofPkSmbkWWL3Xd6dBy1BQ02Tmr6n817kG+O/Ap4B1mdle3KZm5p9UDfmXqsc7KX5/IyKAkVXPvVH1+O2q7beB4b0osXtcV/e4zLwDOAd4HXg4Is6orqUwqo59v5XvnNAr919HLdXvp3v7XWN3/blGxDXFOZgZwPeBc6kcle3pdaprqvWdGzpAGQpqmogYD2zPzP8FfAs4DWiJiA8Uz4+IiLYehj/LO9MYs4ERDS6Xoqbjgd9m5vVUlhh/H/AScGREjIuIfwX8x/6oZQ817vpzPTkiDgEOzcwHqUwN7TrltCc/Ay4o9j2FyrScDlC9+a9J6mtTgb+JiLeBt4A/p/Jf9/URcSiV38/vUpki2dXNwH0RsQxYwruPIhrpT4HPRMRbwIvA/Mx8KyLmA78AngH+uZ9q6Umtn+sYKj+vUVT+8/+LXuzvH4BFEbEa+CWV6aNtfVuyBgovSZW0RxExDBiRmTuKq7eWUDl5/2aTS1MDeKQgaW8OAn4SESOoHGX8uYFw4PJIQUNORPxX4Pxdmv93Zi6o1b+/DNS6NLQYCpKkklcfSZJKhoIkqWQoSJJKhoIkqfT/AcltyA+MaZv0AAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "sns.countplot(x=\"serum_insulin_missing\", hue=\"Target\",data=train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "train.drop([\"Triceps_skin_fold_thickness_missing\", \"serum_insulin_missing\"], axis=1, inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "pregnants                       0\n",
      "Plasma_glucose_concentration    0\n",
      "blood_pressure                  0\n",
      "Triceps_skin_fold_thickness     0\n",
      "serum_insulin                   0\n",
      "BMI                             0\n",
      "Diabetes_pedigree_function      0\n",
      "Age                             0\n",
      "Target                          0\n",
      "dtype: int64\n"
     ]
    }
   ],
   "source": [
    "# 填补中值\n",
    "medians = train.median() \n",
    "train = train.fillna(medians)\n",
    "\n",
    "print(train.isnull().sum())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 标准化"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "y_train = train['Target']\n",
    "X_train = train.drop([\"Target\"], axis=1)\n",
    "\n",
    "feat_names = X_train.columns\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.preprocessing import StandardScaler\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "ss_X = StandardScaler()\n",
    "X_train = ss_X.fit_transform(X_train)"
   ]
  },
  {
   "cell_type": "code",
<<<<<<< HEAD
   "execution_count": 33,
=======
   "execution_count": 30,
>>>>>>> e5aabbfb52a22fcd15cbe4d178da4e10ba3e72dc
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>pregnants</th>\n",
       "      <th>Plasma_glucose_concentration</th>\n",
       "      <th>blood_pressure</th>\n",
       "      <th>Triceps_skin_fold_thickness</th>\n",
       "      <th>serum_insulin</th>\n",
       "      <th>BMI</th>\n",
       "      <th>Diabetes_pedigree_function</th>\n",
       "      <th>Age</th>\n",
       "      <th>Target</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.639947</td>\n",
       "      <td>0.866045</td>\n",
       "      <td>-0.031990</td>\n",
       "      <td>0.670643</td>\n",
       "      <td>-0.181541</td>\n",
       "      <td>0.166619</td>\n",
       "      <td>0.468492</td>\n",
       "      <td>1.425995</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>-0.844885</td>\n",
       "      <td>-1.205066</td>\n",
       "      <td>-0.528319</td>\n",
       "      <td>-0.012301</td>\n",
       "      <td>-0.181541</td>\n",
       "      <td>-0.852200</td>\n",
       "      <td>-0.365061</td>\n",
       "      <td>-0.190672</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1.233880</td>\n",
       "      <td>2.016662</td>\n",
       "      <td>-0.693761</td>\n",
       "      <td>-0.012301</td>\n",
       "      <td>-0.181541</td>\n",
       "      <td>-1.332500</td>\n",
       "      <td>0.604397</td>\n",
       "      <td>-0.105584</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>-0.844885</td>\n",
       "      <td>-1.073567</td>\n",
       "      <td>-0.528319</td>\n",
       "      <td>-0.695245</td>\n",
       "      <td>-0.540642</td>\n",
       "      <td>-0.633881</td>\n",
       "      <td>-0.920763</td>\n",
       "      <td>-1.041549</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>-1.141852</td>\n",
       "      <td>0.504422</td>\n",
       "      <td>-2.679076</td>\n",
       "      <td>0.670643</td>\n",
       "      <td>0.316566</td>\n",
       "      <td>1.549303</td>\n",
       "      <td>5.484909</td>\n",
       "      <td>-0.020496</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   pregnants  Plasma_glucose_concentration  blood_pressure  \\\n",
       "0   0.639947                      0.866045       -0.031990   \n",
       "1  -0.844885                     -1.205066       -0.528319   \n",
       "2   1.233880                      2.016662       -0.693761   \n",
       "3  -0.844885                     -1.073567       -0.528319   \n",
       "4  -1.141852                      0.504422       -2.679076   \n",
       "\n",
       "   Triceps_skin_fold_thickness  serum_insulin       BMI  \\\n",
       "0                     0.670643      -0.181541  0.166619   \n",
       "1                    -0.012301      -0.181541 -0.852200   \n",
       "2                    -0.012301      -0.181541 -1.332500   \n",
       "3                    -0.695245      -0.540642 -0.633881   \n",
       "4                     0.670643       0.316566  1.549303   \n",
       "\n",
       "   Diabetes_pedigree_function       Age  Target  \n",
       "0                    0.468492  1.425995       1  \n",
       "1                   -0.365061 -0.190672       0  \n",
       "2                    0.604397 -0.105584       1  \n",
       "3                   -0.920763 -1.041549       0  \n",
       "4                    5.484909 -0.020496       1  "
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X_train = pd.DataFrame(columns = feat_names, data=X_train)\n",
    "train = pd.concat([X_train, y_train], axis=1)\n",
<<<<<<< HEAD
    "train.to_csv('FE_pima-indians-diabetes.csv', index=False, header=True)\n",
    "train.head()"
=======
    "train.to_csv('FE_pima-indians-diabetes.csv',index = False,header=True)"
>>>>>>> e5aabbfb52a22fcd15cbe4d178da4e10ba3e72dc
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>pregnants</th>\n",
       "      <th>Plasma_glucose_concentration</th>\n",
       "      <th>blood_pressure</th>\n",
       "      <th>Triceps_skin_fold_thickness</th>\n",
       "      <th>serum_insulin</th>\n",
       "      <th>BMI</th>\n",
       "      <th>Diabetes_pedigree_function</th>\n",
       "      <th>Age</th>\n",
       "      <th>Target</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.639947</td>\n",
       "      <td>0.866045</td>\n",
       "      <td>-0.031990</td>\n",
       "      <td>0.670643</td>\n",
       "      <td>-0.181541</td>\n",
       "      <td>0.166619</td>\n",
       "      <td>0.468492</td>\n",
       "      <td>1.425995</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>-0.844885</td>\n",
       "      <td>-1.205066</td>\n",
       "      <td>-0.528319</td>\n",
       "      <td>-0.012301</td>\n",
       "      <td>-0.181541</td>\n",
       "      <td>-0.852200</td>\n",
       "      <td>-0.365061</td>\n",
       "      <td>-0.190672</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1.233880</td>\n",
       "      <td>2.016662</td>\n",
       "      <td>-0.693761</td>\n",
       "      <td>-0.012301</td>\n",
       "      <td>-0.181541</td>\n",
       "      <td>-1.332500</td>\n",
       "      <td>0.604397</td>\n",
       "      <td>-0.105584</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>-0.844885</td>\n",
       "      <td>-1.073567</td>\n",
       "      <td>-0.528319</td>\n",
       "      <td>-0.695245</td>\n",
       "      <td>-0.540642</td>\n",
       "      <td>-0.633881</td>\n",
       "      <td>-0.920763</td>\n",
       "      <td>-1.041549</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>-1.141852</td>\n",
       "      <td>0.504422</td>\n",
       "      <td>-2.679076</td>\n",
       "      <td>0.670643</td>\n",
       "      <td>0.316566</td>\n",
       "      <td>1.549303</td>\n",
       "      <td>5.484909</td>\n",
       "      <td>-0.020496</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>0.342981</td>\n",
       "      <td>-0.185948</td>\n",
       "      <td>0.133453</td>\n",
       "      <td>-0.012301</td>\n",
       "      <td>-0.181541</td>\n",
       "      <td>-0.997745</td>\n",
       "      <td>-0.818079</td>\n",
       "      <td>-0.275760</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>-0.250952</td>\n",
       "      <td>-1.435189</td>\n",
       "      <td>-1.851862</td>\n",
       "      <td>0.329171</td>\n",
       "      <td>-0.610145</td>\n",
       "      <td>-0.211799</td>\n",
       "      <td>-0.676133</td>\n",
       "      <td>-0.616111</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>1.827813</td>\n",
       "      <td>-0.218823</td>\n",
       "      <td>-0.031990</td>\n",
       "      <td>-0.012301</td>\n",
       "      <td>-0.181541</td>\n",
       "      <td>0.414047</td>\n",
       "      <td>-1.020427</td>\n",
       "      <td>-0.360847</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>-0.547919</td>\n",
       "      <td>2.476909</td>\n",
       "      <td>-0.197433</td>\n",
       "      <td>1.808882</td>\n",
       "      <td>4.660524</td>\n",
       "      <td>-0.284572</td>\n",
       "      <td>-0.947944</td>\n",
       "      <td>1.681259</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   pregnants  Plasma_glucose_concentration  blood_pressure  \\\n",
       "0   0.639947                      0.866045       -0.031990   \n",
       "1  -0.844885                     -1.205066       -0.528319   \n",
       "2   1.233880                      2.016662       -0.693761   \n",
       "3  -0.844885                     -1.073567       -0.528319   \n",
       "4  -1.141852                      0.504422       -2.679076   \n",
       "5   0.342981                     -0.185948        0.133453   \n",
       "6  -0.250952                     -1.435189       -1.851862   \n",
       "7   1.827813                     -0.218823       -0.031990   \n",
       "8  -0.547919                      2.476909       -0.197433   \n",
       "\n",
       "   Triceps_skin_fold_thickness  serum_insulin       BMI  \\\n",
       "0                     0.670643      -0.181541  0.166619   \n",
       "1                    -0.012301      -0.181541 -0.852200   \n",
       "2                    -0.012301      -0.181541 -1.332500   \n",
       "3                    -0.695245      -0.540642 -0.633881   \n",
       "4                     0.670643       0.316566  1.549303   \n",
       "5                    -0.012301      -0.181541 -0.997745   \n",
       "6                     0.329171      -0.610145 -0.211799   \n",
       "7                    -0.012301      -0.181541  0.414047   \n",
       "8                     1.808882       4.660524 -0.284572   \n",
       "\n",
       "   Diabetes_pedigree_function       Age  Target  \n",
       "0                    0.468492  1.425995       1  \n",
       "1                   -0.365061 -0.190672       0  \n",
       "2                    0.604397 -0.105584       1  \n",
       "3                   -0.920763 -1.041549       0  \n",
       "4                    5.484909 -0.020496       1  \n",
       "5                   -0.818079 -0.275760       0  \n",
       "6                   -0.676133 -0.616111       1  \n",
       "7                   -1.020427 -0.360847       0  \n",
       "8                   -0.947944  1.681259       1  "
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train.head(9)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
